library("dplyr")
library("haven")

## Set working directory
## to Dataverse folder

load("Harris_Data/Harris 2013 Public Opinion Survey, study no. 43567/harris_s43567.RData")
survey <- table

### fix everything below for this survey 

# pid
survey$pid <- as.character(survey$RESPID)

# study 
survey$study <- "43567"

# study year (year)
survey$year <- 2013

# geographic data (urban)
survey$urban <- haven::as_factor(survey$Q1565)
survey$urban <- as.character(survey$urban)
table(survey$urban)

# geographic data (region)
survey$region <- as.character(as_factor(survey$Q320))
table(survey$region)

# respondent head of household (hh)
survey$hh <- NA

# increasing inequality (inequality)
survey$inequality <- as.character(as_factor(survey$Q956_2))
table(survey$inequality)
survey$inequality <- dplyr::recode(survey$inequality,
                            `Yes, feel this way` = "Feel",
                            `No, don't feel this way` = "Don't Feel")
table(survey$inequality)

# inequality variable (inequality.variable)
survey$inequality.variable <- 1

# union (union.self)
survey$union.self <- NA
survey$union.other <- NA

# employment (employed)  - head of house
survey$employed <- NA

## employment  - respondent
survey$Q406_1 <- as.character(as_factor(survey$Q406_1))
survey$Q406_1 <- dplyr::recode(survey$Q406_1,
                               'Yes' = "Employed full time" )
survey$Q406_2 <- as.character(as_factor(survey$Q406_2))
survey$Q406_2 <- dplyr::recode(survey$Q406_2,
                               'Yes' = "Employed part time" )
survey$Q406_3 <- as.character(as_factor(survey$Q406_3))
survey$Q406_3 <- dplyr::recode(survey$Q406_3,
                               'Yes' = "Self employed" )
survey$Q406_4 <- as.character(as_factor(survey$Q406_4))
survey$Q406_4 <- dplyr::recode(survey$Q406_4,
                               'Yes' = "Unemployed, looking" )
survey$Q406_5 <- as.character(as_factor(survey$Q406_5))
survey$Q406_5 <- dplyr::recode(survey$Q406_5,
                               'Yes' = "Unemployed, not looking")
survey$Q406_6 <- as.character(as_factor(survey$Q406_6))
survey$Q406_6 <- dplyr::recode(survey$Q406_6,
                               'Yes' = "Retired")
survey$Q406_7 <- as.character(as_factor(survey$Q406_7))
survey$Q406_7 <- dplyr::recode(survey$Q406_7,
                               'Yes' = "Disabled/Illness")
survey$Q406_8 <- as.character(as_factor(survey$Q406_8))
survey$Q406_8 <- dplyr::recode(survey$Q406_8,
                               'Yes' = "Student")
survey$Q406_9 <- as.character(as_factor(survey$Q406_9))
survey$Q406_9 <- dplyr::recode(survey$Q406_9,
                               'Yes' = "Stay at home spouse")
survey$Q406_99 <- as.character(as_factor(survey$Q406_99))
survey$Q406_99 <- dplyr::recode(survey$Q406_99,
                               'Yes' = "unknown")
table(survey$Q406_99) ## no unknowns 

### checking to make sure mutually exclusive - no
### some overlap between employed full time and the other employed
## categories as well as retired; none with unemployed categories
## or disability; some also with student and stay at home spouse
survey$employed.self <- ifelse(survey$Q406_1 == "No",
                          ifelse(survey$Q406_2 == "No",
                                 ifelse(survey$Q406_3 == "No",
                                        "No", "Yes"), "Yes"), "Yes")
table(survey$employed.self)
## check
table(survey$Q406_1) ## 781
table(survey$Q406_2, survey$Q406_1) ## 213 employed part time, not full time
table(survey$Q406_3[survey$Q406_1 == "No"],
      survey$Q406_2[survey$Q406_1 == "No"]) ## 102 self employed
## not part time
## 1096 - matches 

# occupation
survey$occupation <- NA

# occ self
survey$occupation.self <- as.character(as_factor(survey$Q428))

# household size (hhsize)
table(survey$Q368)
survey$hh_above18 <-  as.numeric(survey$Q368)
table(survey$hh_above18)
table(survey$Q372)
survey$hh_under18 <- as.numeric(survey$Q372)
survey$hhsize <- survey$hh_above18 + survey$hh_under18
table(survey$hhsize)

# education (educ)
survey$educ <- as.character(as_factor(survey$Q437))
table(survey$educ)

# household income (income)
survey$income <- as.character(as_factor(survey$Q462))

# age
class(survey$Q280)
survey$age <- as.character(survey$Q280)

# race
survey$race <- as.character(as_factor(survey$Q485))

# politics (party)
table(survey$Q1500)
survey$party <- as.character(as_factor(survey$Q1500))
table(survey$party)

# politics (ideology)
table(survey$Q1520)
survey$ideology <- as.character(as_factor(survey$Q1520))

# gender
table(survey$Q268)
survey$gender <- as.character(as_factor(survey$Q268))

table(survey$gender)

# religion
survey$religion <- NA

#factuals
survey$factual1 <- NA
survey$factual2 <- NA
survey$factual3 <- NA

## alienation index

survey$dontcare <- as.character(as_factor(survey$Q956_1))
survey$dontcount <- as.character(as_factor(survey$Q956_3))
survey$leftout <- as.character(as_factor(survey$Q956_4))

## question placement
survey$question_place <- "before party"

# subset
survey_43567 <- survey[,c("pid", "study", "year", "urban", "region", "hh",
                          "inequality", "inequality.variable", "union.self", "union.other",
                          "employed", "employed.self", "occupation", "occupation.self", "hhsize", "educ", "income", 
                          "age", "race", "party", "ideology", "gender", "religion",
                          "factual1", "factual2", "factual3", "dontcare", "dontcount", "leftout",
                          "question_place")]

# save file
#saveRDS(survey_43567, file = "Harris_Data/survey_43567.rds")
